##Connect to Database This Practicum will use a remote instance of MySQL on db4free.net.This database is very easy to set up and there is no configuration or management.

# 1. Library 
library(RMySQL)
## Loading required package: DBI
library(DBI)
library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
## 
## Attaching package: 'RSQLite'
## The following object is masked from 'package:RMySQL':
## 
##     isIdCurrent
## sqldf will default to using MySQL
library(plyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(tinytex)

# 2. Settings
db_user <- 'sarthak55k'
db_password <- 'Sarthak@ubs123'
db_name <- 'db5200practicum1'
db_host <- 'db4free.net' 
db_port <- 3306 

# 3. Connect to DB
mydb <-  dbConnect(MySQL(), user = db_user, password = db_password,
                 dbname = db_name, host = db_host, port = db_port)

##Create Database

#4A. Create table incidents This table will stores wildlife strike incidents.


DROP TABLE IF EXISTS incidents;

CREATE TABLE incidents(
  rid INTEGER NOT NULL  PRIMARY KEY,
  `dep.date` DATE NULL,
  origin INTEGER NULL,
  airline INTEGER NULL,
  aircraft TEXT NULL,
  `flight.phase` SET ("takeoff", "landing", "inflight", "unknown") NOT NULL DEFAULT 'unknown',
  altitude INTEGER NULL,
  conditions VARCHAR(255) NULL,
  warned BOOLEAN NOT NULL DEFAULT FALSE,
  CHECK(altitude >= 0)
)

#4B. Create table airports This table will stores airport name, airport code and states.


DROP TABLE IF EXISTS airports;

CREATE TABLE airports(
  aid INTEGER NOT NULL PRIMARY KEY AUTO_INCREMENT,
  airportName TEXT NOT NULL,
  airportCode TEXT NULL,
  state TEXT
)

#4C. Setup origin foreign key Given chunk will link the incidents and airports tables via the origin foreign key in incidents to the primary key aid in airports.


ALTER TABLE incidents ADD CONSTRAINT airports_origin_fkey FOREIGN KEY (origin) REFERENCES airports(aid);

#4D. Create lookup table conditions This table will link incidents table with conditions foreign key.


DROP TABLE IF EXISTS conditions;

CREATE TABLE conditions(
  cid INTEGER NOT NULL PRIMARY KEY AUTO_INCREMENT,
  `condition` VARCHAR(255) NOT NULL,
  explanation TEXT NULL,
  UNIQUE (`condition`)
)

Make conditions column a foreign key.


ALTER TABLE incidents MODIFY conditions INTEGER;

ALTER TABLE incidents ADD CONSTRAINT conditions_fkey FOREIGN KEY (conditions) REFERENCES conditions(cid);

#4E. Create table airlines This table will stores airlines name, airlines code and flag.

DROP TABLE IF EXISTS airlines;
CREATE TABLE airlines(
  eid INTEGER NOT NULL PRIMARY KEY AUTO_INCREMENT,
  airlineName TEXT NOT NULL,
  airlineCode TEXT NULL,
  flag TEXT NULL
)

#4F. Setup origin foreign key Given chunk will link the incidents and airlines tables via the airline foreign key in incidents to the primary key eid in airlines.


ALTER TABLE incidents ADD CONSTRAINT airports_airlines_fkey FOREIGN KEY (airline) REFERENCES airlines(eid);
bds.raw <- read.csv("BirdStrikesData-V2.csv", stringsAsFactors = FALSE)

bds.raw 

#Transformaing the flight phase.

unique(bds.raw$flight_phase)
## [1] "Climb"        "Landing Roll" "Approach"     "Take-off run" "Descent"     
## [6] ""             "Taxi"         "Parked"
x <- c("Parked","Taxi","Take-off run","Climb", "Approach", "Descent", "Landing Roll","")
y <- c("unknown","unknown","takeoff","takeoff","inflight","landing","landing","unknown")
bds.raw$flight_phase <- mapvalues(bds.raw$flight_phase, 
          from=x, 
          to=y)
bds.raw$flight_date[bds.raw$flight_date == "" | bds.raw$flight_date == "N/A" | bds.raw$flight_date == "UNKNOWN" | is.na(bds.raw$flight_date) == TRUE ] <- "1/1/2000 0:00"
view1 <- distinct(bds.raw[,c(3,10)],.keep_all=TRUE)
view1$airport[view1$airport == "" | view1$airport == "N/A" | view1$airport == "UNKNOWN" | is.na(view1$airport) == "TRUE"] <- "unknown" 
view1$origin[view1$origin == "" | view1$origin == "N/A" | view1$origin == "UNKNOWN" | is.na(view1$origin) == "TRUE"] <- "unknown"
view1
unique(view1$airport[view1$airport == "unknown"])
## [1] "unknown"
query <- paste0("INSERT INTO airports (airportName, state) VALUES ", 
                paste(sprintf('("%s", "%s")', view1$airport, view1$origin), 
                      collapse = ","))
dbExecute(mydb, query)
## [1] 1141

#updating the view1

view1 <- dbGetQuery(mydb,"SELECT aid, airportName AS airport FROM airports")
view1
view2 <- distinct(bds.raw['airline'],.keep_all=TRUE)
view2$airline[view2$airline == "" | view2$airline == "N/A" | view2$airline == "UNKNOWN" | is.na(view2$airline) == "TRUE"] <- "unknown" 
View(view2)
unique(view2$airline[view2$airline == "UNKNOWN"])
## character(0)
query <- paste0("INSERT INTO airlines (airlineName) VALUES ", 
                paste(sprintf('("%s")', view2$airline), 
                      collapse = ","))
dbExecute(mydb, query)
## [1] 293

#updating the view2

view2 <- dbGetQuery(mydb,"SELECT eid, airlineName AS airline FROM airlines")
view2
view3 <- distinct(bds.raw["sky_conditions"],.keep_all = TRUE)
view3
query <- paste0("INSERT INTO conditions (`condition`) VALUES ", 
                paste(sprintf("('%s')", view3$sky_conditions), 
                      collapse = ","))
dbExecute(mydb, query)
## [1] 3

#updating the view3

view3 <- dbGetQuery(mydb,"SELECT cid,`condition` AS sky_conditions FROM conditions")
view3
temp <-left_join(left_join(left_join(bds.raw,view1, by = "airport"),view2, by = "airline"),view3, by = "sky_conditions")
temp

#c(1,7,2,11,18,17)

view4 <- temp[,c(1,7,20,21,2,11,18,22,17)]

view4$flight_date <- gsub('-', '/', view4$flight_date)
view4$flight_date <- as.Date(view4$flight_date, "%m/%d/%y")
 

#view2$airlines[view1$airlines == ""] <- "unknown"
view4

#view4\(rid,view4\)flight_date,view4\(aid,view4\)eid,view4\(aircraft,view4\)flight_phase,view4\(cid,view4\)pilot_warned_flag,

query <- paste0("INSERT INTO incidents (rid, `dep.date`, origin, airline, aircraft, `flight.phase`, altitude, `conditions`, warned) VALUES ", 
                paste(sprintf("('%s', NULLIF('%s', 'NA'), NULLIF('%d', 'NA'), NULLIF('%d', 'NA'),'%s', '%s', NULLIF('%s', 'NA'), NULLIF('%d', 'NA'), '%d')",
                              view4$rid, view4$flight_date, view4$aid, view4$eid, view4$aircraft, view4$flight_phase, as.numeric(gsub(",", "", view4$altitude_ft)), view4$cid, ifelse(view4$pilot_warned_flag == 'Y', TRUE, FALSE)),
                      collapse = ","))

dbExecute(mydb, query)
## [1] 25558

select * from incidents
#DELETE FROM incidents
Displaying records 1 - 10
rid dep.date origin airline aircraft flight.phase altitude conditions warned
1195 2020-11-13 37 21 Airplane inflight 2000 3 1
3019 2020-10-10 717 21 Airplane takeoff 400 1 1
3500 2020-05-15 37 21 Airplane inflight 1000 1 1
3504 2020-05-23 37 21 Airplane inflight 1800 1 1
3597 2020-04-18 123 21 Airplane inflight 200 2 1
4064 2020-04-06 37 21 Airplane inflight 1000 1 1
4074 2020-07-15 180 21 Airplane takeoff 0 1 1
4076 2020-07-15 37 21 Airplane takeoff 500 2 1
4090 2020-07-02 114 21 Airplane takeoff 50 2 1
4091 2020-07-07 114 21 Airplane takeoff 0 2 1

#8 limit 10 is remaining

query <- "SELECT ar.state, count(*) NoOfIncidents FROM incidents as i LEFT JOIN airports as ar ON i.origin = ar.aid group by ar.state order by NoOfIncidents DESC LIMIT 10"
dbGetQuery(mydb,query)

#9

SELECT ai.airlineName, count(*) NoOfIncidents
  FROM incidents as i
  LEFT JOIN 
  airlines as ai
  ON i.airline = ai.eid
  GROUP BY ai.airlineName
  HAVING NoOfIncidents > 
    (
    SELECT avg(NoOfIncidents)
    FROM (
    SELECT ai.airlineName, count(*) NoOfIncidents
        FROM incidents as i
        LEFT JOIN 
        airlines as ai
        ON i.airline = ai.eid
        GROUP BY ai.airlineName
    ) as av
)
ORDER BY NoOfIncidents DESC
Displaying records 1 - 10
airlineName NoOfIncidents
SOUTHWEST AIRLINES 4628
BUSINESS 3074
AMERICAN AIRLINES 2058
DELTA AIR LINES 1349
AMERICAN EAGLE AIRLINES 932
SKYWEST AIRLINES 891
US AIRWAYS* 797
JETBLUE AIRWAYS 708
UPS AIRLINES 590
US AIRWAYS 540

#10 #in jan how many incident happend for a particular flight phase considering all years. #group by on month, flight phase #count(*)

SELECT MONTH(i.`dep.date`) as Month, i.`flight.phase`, count(*) NoOfIncidents 
FROM incidents as i
GROUP BY MONTH(i.`dep.date`),i.`flight.phase`
ORDER BY Month,NoOfIncidents;
Displaying records 1 - 10
Month flight.phase NoOfIncidents
1 unknown 133
1 landing 202
1 takeoff 357
1 inflight 374
2 unknown 2
2 landing 176
2 takeoff 293
2 inflight 301
3 unknown 9
3 landing 245
query <- "SELECT MONTH(i.`dep.date`) as Month, i.`flight.phase`, count(*) NoOfIncidents FROM incidents as i GROUP BY MONTH(i.`dep.date`),i.`flight.phase` ORDER BY Month;"

t <- dbGetQuery(mydb,query)
ggplot(t, aes(x = Month, y = NoOfIncidents)) +
    geom_point()

CREATE PROCEDURE SPAddNewIncident(
  nRid INTEGER,
  nDepDate DATE,
  nAirport TEXT,
  nOrigin TEXT,
  nAirline TEXT,
  nAircraft TEXT,
  nPhase TEXT,
  nConditions TEXT,
  nAltitude INTEGER,
  nWarned BOOLEAN
)
BEGIN

declare naid int default 0;
declare neid int default 0;
declare ncid int default 0;

IF (ISNULL(nRid))
THEN
SIGNAL SQLSTATE '42601'
SET MESSAGE_TEXT = 'rid can not be null!';
END IF;

IF (nAltitude < 0)
THEN
SIGNAL SQLSTATE '42601'
SET MESSAGE_TEXT = 'altitude can not be negative!';
END IF;

IF (nPhase not in ("takeoff", "landing", "inflight", "unknown"))
THEN
SIGNAL SQLSTATE '42601'
SET MESSAGE_TEXT = 'phase is incorrect!';
END IF;

  IF (nAltitude >= 0)
  THEN
  
    SET naid = (SELECT aid FROM airports WHERE airportName = nAirport AND state = nOrigin);
    IF (ISNULL(naid))
    THEN 
    
      INSERT INTO airports (airportName, state) VALUES (nAirport, nOrigin);
      SET naid = (SELECT aid FROM airports WHERE airportName = nAirport AND state = nOrigin);
      
    END IF;
    
    
    SET neid = (SELECT eid FROM airlines WHERE airlineName = nAirline);
    IF (ISNULL(neid))
    THEN 
    
      INSERT INTO airlines (airlineName) VALUES (nAirline);
      SET neid = (SELECT eid FROM airlines WHERE airlineName = nAirline);
      
    END IF;
  
    
    SET ncid = (SELECT cid FROM conditions WHERE `condition` = nConditions);
    
    INSERT INTO incidents (rid, `dep.date`, origin, airline, aircraft, `flight.phase`, altitude, `conditions`, warned)
    VALUES
    (nRid, nDepDate, naid, neid, nAircraft, nPhase, nAltitude, ncid, nWarned);
    
  END IF;
  
END
CALL SPAddNewIncident(11, '1997-06-10','LAGUARDIA NY', 'NY', 'AMERICAN AIRLINES', 'Airplane', 'unknown', 'No Cloud', 50, TRUE);
DROP PROCEDURE SPAddNewIncident;
# Disconnect database connection
dbDisconnect(mydb)
## [1] TRUE